#   Filename: jurisdictionLD_source.R
#   Description: Source script to produce results for models of "ratifyrome using the unimputed data set with listwise deletion, shown in the supplementary appendix. Called from jurisdiction_driver.R.
# 	Author: Barry Hashimoto
# 	Date: December 2019
# 	For: Barry Hashimoto, "Autocratic Consent to International Law: the Case of the International Criminal Court's Jurisdiction," International Organization.

############################################################################################################
# Prepare data for analysis.

# Rename the **unimputed data set** to `data'.
data <- NULL
data <- all

# Rename certain variables so that code from an earlier version of this files can remain unmodified.
   data$ruleoflaw <- data$vdem.ruleoflaw

# Legal tradition dummies.
  suppressPackageStartupMessages(library(caret, quietly = T))
   	   legaldummies <- as.data.frame(predict(dummyVars(~ as.factor(legaltrad2), data = data), 
   	   newdata = data))
       names(legaldummies) <- c("civil", "common", "mixed", "islam")
       data <- data.frame(data, legaldummies)

# Drop all post-ratification observations to correctly estimate the pooled event history logits and calculate country-specific time-trends estimated from cubic polynomials.
  data <- data[is.na(data$ratifyrome)==F,]  
  data$ccode <- factor(data$ccode)
  data$ratify.trend.single <- predict(lm(ratifyrome ~ poly(quartersinoffice.since1998Q3, 3), data = data))
  data$ratify.trend.country <- predict(lm(ratifyrome ~ ccode*poly(quartersinoffice.since1998Q3, 3), data = data)) 	    
  
# Identify the OECD DAC States so they can be dropped
  dacmemberlist <- c("Australia", "Austria", "Belgium", "Canada", "Denmark", "Finland", "France", "Germany", 
    "Greece", "Ireland", "Italy", "Japan", "South Korea", "Luxembourg", "Netherlands", "New Zealand", "Norway", 
    "Portugal", "Spain", "Sweden", "Switzerland", "United Kingdom", "United States of America")
    
# Exclude DAC member states
  data <- data[ifelse(data$country %in% dacmemberlist, 1, 0)==0,]
  
# Drop all observations before 1998(3)
  data <- data[data$quarter>=154, ]

  
############################################################################################################
# Models where the dependent variable is "ratifyrome" using the unimputed data set.

suppressPackageStartupMessages(library(brglm, quietly = T)) # Bias-reduced logit estimation appropriate for cases of separation due to regional and year dummies.

model1 <-  brglm(formula = ratifyrome ~ as.factor(region) + as.factor(year) + ratify.trend.single
  + common + mixed + islam + gdp.scaled + ruleoflaw + pre98conflict + ptssum.mean + medianIMR
  + democracy*capital.scaled, family = binomial(link = "logit"), method = "brglm.fit", data = data)
model2 <-  brglm(formula = ratifyrome ~ as.factor(region) + as.factor(year) + ratify.trend.single
  + common + mixed + islam + gdp.scaled + ruleoflaw + pre98conflict  + ptssum.mean + medianIMR
  + democracy*demaid.scaled, family = binomial(link = "logit"), method = "brglm.fit", data = data)  
model3 <-  brglm(formula = ratifyrome ~ as.factor(region) + as.factor(year) + ratify.trend.single
  + common + mixed + islam + gdp.scaled + ruleoflaw + pre98conflict + ptssum.mean + medianIMR
  + democracy*multilataid.scaled, family = binomial(link = "logit"), method = "brglm.fit", data = data)
model4 <-  brglm(formula = ratifyrome ~ as.factor(region) + as.factor(year) + ratify.trend.single
  + common + mixed + islam + gdp.scaled + ruleoflaw + pre98conflict + ptssum.mean + medianIMR
  + democracy*dod.scaled, family = binomial(link = "logit"), method = "brglm.fit", data = data)
model11 <-  brglm(formula = ratifyrome ~ as.factor(region) + as.factor(year) + ratify.trend.country
  + common + mixed + islam + gdp.scaled + ruleoflaw + pre98conflict +  ptssum.mean + medianIMR
  + democracy*capital.scaled, family = binomial(link = "logit"), method = "brglm.fit", data = data)
model22 <-  brglm(formula = ratifyrome ~ as.factor(region) + as.factor(year) + ratify.trend.country
  + common + mixed + islam + gdp.scaled + ruleoflaw + pre98conflict  + ptssum.mean + medianIMR
  + democracy*demaid.scaled, family = binomial(link = "logit"), method = "brglm.fit", data = data)
model33 <-  brglm(formula = ratifyrome ~ as.factor(region) + as.factor(year) + ratify.trend.country
  + common + mixed + islam + gdp.scaled + ruleoflaw + pre98conflict + ptssum.mean + medianIMR
  + democracy*multilataid.scaled, family = binomial(link = "logit"), method = "brglm.fit", data = data,
    control.brglm=brglm.control(br.maxit=1000)) # Increase max iterations for this particular model fit.
model44 <-  brglm(formula = ratifyrome ~ as.factor(region) + as.factor(year) + ratify.trend.country 
  + common + mixed + islam + gdp.scaled + ruleoflaw + pre98conflict + ptssum.mean + medianIMR
  + democracy*dod.scaled, family = binomial(link = "logit"), method = "brglm.fit", data = data)

# Define some functions.

# A function to stop excess verbosity when running matchit().
quiet <- function(x) { 
  sink(tempfile()) 
  on.exit(sink()) 
  invisible(force(x)) 
 } 
     
# Multivariate normal simulations of the coefficients.
aut.slopes.lwd <- function(x){
  require(mvtnorm)
  betas <- summary(x)$coefficients[,1]
  variances <- as.matrix(vcov(x))
  sim.mat <- NULL
  sim.mat <- rbind(sim.mat, rmvn(100000, betas, variances))
  sim.vec <- sim.mat[,(ncol(sim.mat)-1)]
  estimates <- c(mean(sim.vec), sd(sim.vec), dnorm(mean(sim.vec)/sd(sim.vec)))
  invisible(estimates)
  }

dem.slopes.lwd <- function(x){
  require(mvtnorm)
  betas <- summary(x)$coefficients[,1]
  variances <- as.matrix(vcov(x))
  sim.mat <- NULL
  sim.mat <- rbind(sim.mat, rmvn(100000, betas, variances))
  summed <- rowSums(sim.mat[,(ncol(sim.mat)-1):ncol(sim.mat)])
  estimates <- c(mean(summed), sd(summed), dnorm(mean(summed)/sd(summed)))
  invisible(estimates)
  }

# A function to get model output stats for each model in Table 2
output.lwd <- function(x){
	mat <- rbind(aut.slopes.lwd(x), 
	             dem.slopes.lwd(x),
	             c(AIC(logLik(x)), NA, NA),
	             c(length(x$residuals), NA, NA)
	             )
	#mat[c(1:2),] <- round(mat[c(1:2),], 3)           
	rownames(mat) <- c("Coef. of Capital in Autocracies", "Coef. of Capital in Democracies", "Model AIC", "N")
	return(mat)
	}

# Table 2 results with unimputed data and listwise deletion.
col1 <- list(output.lwd(model2), output.lwd(model3), output.lwd(model4), output.lwd(model1))
col1 <- do.call(rbind, col1)
col2 <- list(output.lwd(model22), output.lwd(model33), output.lwd(model44), output.lwd(model11))
col2 <- do.call(rbind, col2)
H1table <- cbind(col1, col2)

# Remove objects that consume lots of memory and aren't needed now.
rm(model1, model2, model3, model4, model11, model22, model33, model44)

############################################################################################################
# Matching with **autocracies** alone
  aut <- data[data$democracy==0, ]
  
# Drop observations with missing capital.scaled and ratifyrome.
  aut <- aut[!is.na(aut$capital.scaled) & !(is.na(aut$ratifyrome)),]

# Discretize capital.scaled at median 
  aut$capital.dummy <- ifelse(aut$capital.scaled > quantile(aut$capital.scaled, 0.5), 1, 0)
  
# Define cutpoints for coarsened exact matching on continuous variables
  gdp.cut <- quantile(aut$gdp.scaled, seq(0,1,.5), na.rm = T)
  rol.cut <- quantile(aut$ruleoflaw, seq(0,1,.5), na.rm = T)
  imr.cut <- quantile(aut$medianIMR, seq(0,1,.5), na.rm = T)
  pts.cut <- quantile(aut$ptssum.mean, seq(0,1,.5), na.rm = T)

# Narrow the data set to model variables and drop all rows with NAs in any variable (i.e. do LWD.)
   data.list <- with(aut, data.frame(ratifyrome, gdp.scaled, ruleoflaw, capital.dummy, 
      common, mixed, islam, pre98conflict, region, ccode, lcode, year, quarter, medianIMR, 
      ptssum.mean, ratify.trend.country, ratify.trend.single))
   data.list <- data.list[complete.cases(data.list),]
   
   match.list <- quiet(matchit(capital.dummy ~ gdp.scaled + ruleoflaw + common + mixed + islam 
   + ptssum.mean + medianIMR + pre98conflict, data = data.list, method = "cem", 
   cutpoints = list(gdp.scaled = gdp.cut, ruleoflaw = rol.cut, ptssum.mean = pts.cut, medianIMR = imr.cut)))

  aut.match <- match.list # save it

#writeLines("")
#writeLines("# Print the N for completely observed rows of autocracies after listwise deletion.") 
#print(dim(data.list))
#writeLines("")
#writeLines("# Print N for models with matched autocracies after listwise deletion.")  
#print(aut.match)
  
# Logit with shared baselines. "Common" dropped due to no variation in sample.
  aut.shared <- glm(ratifyrome ~ capital.dummy + gdp.scaled + ruleoflaw  
    + mixed + islam + pre98conflict + ptssum.mean + medianIMR + ratify.trend.single, 
    family = binomial(link="logit"),  data = match.data(aut.match))

# Logit with country specific baselines. Mixed has to be dropped in order for model to converge.
  aut.country <- glm(ratifyrome ~ capital.dummy + gdp.scaled + ruleoflaw  
     + islam + pre98conflict + ptssum.mean + medianIMR + ratify.trend.country, 
    family = binomial(link="logit"), data = match.data(aut.match))

# Calculate sample average treatment effect on treated for list-wise deleted data, dropping binary variables because units have been exactly matched on them.
satt.ratify <- zelig(formula = ratifyrome ~ ratify.trend.country + gdp.scaled + ruleoflaw + islam + ptssum.mean + medianIMR + pre98conflict + capital.dummy, model = "logit", data = match.data(aut.match), cite = F) %>% ATT(treatment = "capital.dummy", treated = 1, num = 20000) %>% get_qi(qi="ATT", xvalue = "TE")

writeLines("")
writeLines("# Print the 95% interval plus 25th and is 75th percentiles (top) and median absolute deviation (bottom) the SATT of the high capital variable on accepting jurisdiction among AUTOCRACIES, using the matched, list-wise deleted data.")

print(signif(quantile(satt.ratify, c(.025, .25, .5, .75, .975)), digits = 2))
print(signif(mad(satt.ratify), digits = 2))

############################################################################################################
# Matching with **democracies** alone
  dem <- data[data$democracy==1, ]
  
# Drop observations with missing capital.scaled and ratifyrome.
  dem <- dem[!is.na(dem$capital.scaled) & !(is.na(dem$ratifyrome)),]

# Discretize capital.scaled at median 
  dem$capital.dummy <- ifelse(dem$capital.scaled > quantile(dem$capital.scaled, 0.5), 1, 0)
  
# Define cutpoints for coarsened exact matching on continuous variables
  gdp.cut <- quantile(dem$gdp.scaled, seq(0,1,.5), na.rm = T)
  rol.cut <- quantile(dem$ruleoflaw, seq(0,1,.5), na.rm = T)
  imr.cut <- quantile(dem$medianIMR, seq(0,1,.5), na.rm = T)
  pts.cut <- quantile(dem$ptssum.mean, seq(0,1,.5), na.rm = T)

# Narrow the data set to model variables and drop all rows with NAs in any variable (i.e. do LWD.)
   data.list <- with(dem, data.frame(ratifyrome, gdp.scaled, ruleoflaw, capital.dummy, 
      common, mixed, islam, pre98conflict, region, ccode, lcode, year, quarter, medianIMR, 
      ptssum.mean, ratify.trend.country, ratify.trend.single))
   data.list <- data.list[complete.cases(data.list),]
   

   match.list <- quiet(matchit(capital.dummy ~ gdp.scaled + ruleoflaw + common + mixed + islam 
     + ptssum.mean + medianIMR + pre98conflict, data = data.list, method = "cem", 
     cutpoints = list(gdp.scaled = gdp.cut, ruleoflaw = rol.cut, ptssum.mean = pts.cut, medianIMR = imr.cut)))

   dem.match <- match.list # save it

#writeLines("")
#writeLines("# Print the N for completely observed rows of democracies after listwise deletion.") 
#print(dim(data.list))
#writeLines("")
#writeLines("# Print N for models with matched democracies after listwise deletion.")  
#print(dem.match)

# Logit with shared baselines. medianIMR has to be dropped for model to converge.
  dem.shared <- glm(ratifyrome ~ capital.dummy + gdp.scaled + ruleoflaw + common 
    + mixed + pre98conflict + ptssum.mean + ratify.trend.single, 
    family = binomial(link = "logit"),  data = match.data(dem.match))
        
# Logit with country specific baselines. 
    #Multiple control vars have to be dropped in order for model to converge.
  dem.country <- glm(ratifyrome ~ capital.dummy + ruleoflaw + common 
    + pre98conflict + ratify.trend.country, family = binomial(link = "logit"), 
    data = match.data(dem.match))
    
 

######################################################################################################
# Functions for calculating estimates

simulate.capital.matched.lwd <- function(x){
  require(mvtnorm)
  betas <- summary(x)$coefficients[,1]
  variances <- as.matrix(vcov(x))
  sim.mat <- NULL
  sim.mat <- rbind(sim.mat, rmvn(100000, betas, variances))
  sim.vec <- sim.mat[,2]
  estimates <- c(mean(sim.vec), sd(sim.vec), dnorm(mean(sim.vec)/sd(sim.vec)))
  invisible(estimates)
  }
 
output.capital.matched.lwd <- function(x){
	mat <- rbind(simulate.capital.matched.lwd(x), 
	             c(AIC(logLik(x)), NA, NA),
	             c(length(x$residuals), NA, NA)
	             )
	#mat[c(1:2),] <- round(mat[c(1:2),], 3)              
	rownames(mat) <- c("Coef. of Capital", "Model AIC", "N")
	return(mat)
	}
	
######################################################################################################
# Get results of the models run on matched data, then add them to the table created above

  matchresults.col1 <- rbind(output.capital.matched.lwd(aut.shared), output.capital.matched.lwd(dem.shared))
  matchresults.col2 <- rbind(output.capital.matched.lwd(aut.country), output.capital.matched.lwd(dem.country))
  matched.results <- cbind(matchresults.col1, matchresults.col2)

# Package results.
  presented.table <- rbind(H1table, matched.results)
  
  colnames(presented.table) <- rep(c("Est.", "SE", "p"), 2)
  
  
writeLines("")
writeLines("Print the table of results for regression models of whether a leader accepts the jurisdiction of the ICC using the unimputed data with listwise deletion. The first column has models with a shared baseline hazard. The second row has models with country-specific baseline hazards. The first four rows have models with fixed effects & all control variables. Rows 1-4 feature main independent variable is DAC-EC aid, Multilateral aid, Multilateral loans, and Total aid and loans, in that order. Rows five and six use the matched data set using High Capital as the treatment variable. This table setup mirrors that of the corresponding table in the supplementary appendix.")
print(round(presented.table, digits = 3))

#suppressPackageStartupMessages(library(memisc, quietly = T))
#writeLines("")
#writeLines("# Reprint the table above for LaTex with 3 sig digits.") 
#  print(toLatex(presented.table, digits = 3)) # Print method is cleaner.

# Calculate further N statistics.

#writeLines("")
#writeLines("# Dimensions of the data set before listwise deletion")
#print(dim(data))
  
#writeLines("")
#writeLines("# Print N for regression models using the un-matched data and fixed effects after listwise deletion.")
#print(length(residuals(model2)))

#writeLines("")
#writeLines("# Print the N for democracies in the data set before listwise deletion.") 
#print(dim(data[data$democracy==1,]))

#writeLines("")
#writeLines("# Print the N for autocracies in the data set before listwise deletion.") 
#print(dim(data[data$democracy==0,]))




# End.